
global mdir "C:/Users/schild_j/Dropbox/CE EIP code/CE EIP replication package"
global rawdata1 "/CE EIP I/Raw Data"
global rawdata2 "/CE EIP II/Raw Data"
global rawdata3 "/CE EIP III/Raw Data"

global yr_lb = 19
global yr_ub = 21
global qtrs = 4

************************************ 
***          MEMB files          *** 
************************************

* Check if MEMBER.DTA exists - if not, then create
capture confirm file "${mdir}/${rawdata1}/member.dta" 
if _rc != 0 {
    * ---- Code to import and clean raw data 
	forvalues yr = $yr_lb/$yr_ub {
		forvalues qtr = 1/$qtrs {
			* Check if raw data file exists - if so, then check clean .dta file exists
			capture confirm file "${mdir}/${rawdata1}/memi`yr'`qtr'.xlsx"
			if _rc == 0 {
				* Check if clean .dta file exists - if not, then create clean .dta file
				capture confirm file "${mdir}/${rawdata1}/temp_memi`yr'`qtr'.dta" 
				if _rc != 0 {
					import excel "${mdir}/${rawdata1}/memi`yr'`qtr'.xlsx", firstrow case(lower) clear

					* some dependents are tax filiers and identified by their own tax units
					preserve 
					keep if tu_code == 3 & tax_unit != tu_dpndt
					keep newid tax_unit
					capture confirm file `memb_dpndtu'
					if _rc == 0 {
						append using `memb_dpndtu'
					}
					else if _rc != 0 {
						tempfile memb_dpndtu
					}
					save `memb_dpndtu', replace 
					restore 
					* 	replace the tax unit for the dependent with the tax unit 
					*	the dependent is claimed by (TU_DPNDT => TAX_UNIT)
					replace tax_unit = tu_dpndt if tu_code == 3 & tax_unit != tu_dpndt
					
					***** Count the number of dependents (TU_CODE == 3) *****
					
					* count # who are identified as a dependenet and have age <= 16
					* not all dependents less than 16 are children (e.g grandchildren) 
					*	count done by the tax unit the child is assigned to wthin the CU 
					bysort newid tax_unit: egen dpndlt17_tu = count(newid) if tu_code == 3 & age <= 16
					by newid tax_unit: ereplace dpndlt17_tu = max(dpndlt17_tu)
					replace dpndlt17_tu = 0 if dpndlt17_tu == . 
					
					* count # who are identified as a dependent, regardless of age
					*	count done by the tax unit the dependent is assigend to within the CU
					bysort newid tax_unit: egen dpndany_tu = count(newid) if tu_code == 3 
					by newid tax_unit: ereplace dpndany_tu = max(dpndany_tu)
					replace dpndany_tu = 0 if dpndany_tu == . 
					
					* determine the number of tax units in each CU (NEWID)
					*	NUM_TU is 1 whenever a new value of TAX_UNIT occurs by NEWID
					bysort newid tax_unit: gen num_tu = _n == 1
					*	calculate a running total of NUM_TU by NEWID
					by newid: replace num_tu = sum(num_tu) 
					*	assign the final observation of NUM_TU as all NUM_TU by NEWIDs
					by newid: replace num_tu = num_tu[_N]
					
					keep newid tax_unit num_tu dpndlt17_tu  dpndany_tu

					duplicates drop 
					
					gen qtr = "`yr'"+"`qtr'"
					
					save "${mdir}/${rawdata1}/temp_memi`yr'`qtr'", replace
				}
			}
		}
	}
	* --- Code to combine clean data into single file 
	* Append MEMBI`yr'`qtr'.dta files
	local MEMIfiles: dir "${mdir}/${rawdata1}" files "temp_memi*"
	local i = 1
	foreach file of local MEMIfiles {
		if `i' == 1 { 
			use "${mdir}/${rawdata1}/`file'",  clear
		} 
		else if `i' > 1 { 
			append using "${mdir}/${rawdata1}/`file'"
		} 
		local ++i
	} 
		
	* Save member data 
	save "${mdir}/${rawdata1}/member", replace	
	
	* Save list of tax units that represent dependents that are also tax payers
	use `memb_dpndtu', clear 
	save "${mdir}/${rawdata1}/memb_dpndtu", replace
	
	* Delete clean data
	foreach file of local MEMIfiles {
	    erase "${mdir}/${rawdata1}/`file'"
	}
}
else if _rc == 0 {
	di _n _n _n _n _n ///
	 "************************************" _n ///
	 "************************************" _n ///
	 "***          MEMBQ Note:         ***" _n ///
	 "***    Combined MEMBQ dataset    ***" _n ///
	 "***        already created       ***" _n ///
	 "************************************" _n ///
	 "************************************" _n 
}

************************************ 
***     TTAXQ and TTX2Q files    *** 
************************************

* Check if TAXQ.DTA exists - if not, then create
capture confirm file "${mdir}/${rawdata1}/ntaxi.dta" 
if _rc != 0 {
     * ---- Code to import and clean raw data 
	forvalues yr = $yr_lb/$yr_ub {
		forvalues qtr = 1/$qtrs {
			* Check if raw data file exists - if so, then check clean .dta file exists
			capture confirm file "${mdir}/${rawdata1}/ntaxi`yr'`qtr'.xlsx"
			if _rc == 0 {
				* Check if clean .dta file exists - if not, then create clean .dta file
				capture confirm file "${mdir}/${rawdata1}/temp_ntaxi`yr'`qtr'.dta" 
				if _rc != 0 {
					import excel "${mdir}/${rawdata1}/ntaxi`yr'`qtr'.xlsx", firstrow case(lower) clear
										
					keep newid tax_unit filestat fdagi_py fdagi_cy 

					*	These observations are tax payers who are claimed as a
					*	dependent by someone else 
					drop if filestat == 8
					
					* Check if there is only one file status per TAX_UNIT per NEWID
					bysort newid tax_unit: gen check_num_obs = _n
					quietly sum check_num_obs 
					if `r(max)' != 1 {
						quietly count if check_num_obs > 1 
						local len_base = strlen("************************************")
						local len_n = strlen("`r(N)' obs represent")
						local len_diff = (`len_base' - `len_n'-6)/2
						local len_diff_fl = floor(`len_diff')
						if `len_diff' == `len_diff_fl' {
							local prespace = " "*`len_diff_fl'
							local postspace = " "*`len_diff_fl'
						}
						else if `len_diff' != `len_diff_fl' {
							local prespace = " "*(`len_diff_fl'+1)
							local postspace = " "*`len_diff_fl'
						}
			
						di _n _n _n _n _n ///
						"************************************" _n ///
						"************************************" _n ///
						"***         TTAXQ ERROR:         ***" _n ///
						"***" "`prespace'" "`r(N)' obs represent" "`postspace'" "***" _n ///
						"*** multiple filing statuses for ***" _n ///
						"***  NEWID TAX_UNIT combination  ***" _n ///
						"************************************" _n ///
						"************************************" _n 
						e
					}
					drop check_num_obs 
				
					gen qtr = "`yr'" + "`qtr'"
				
					save "${mdir}/${rawdata1}/temp_ntaxi`yr'`qtr'", replace
				}
			}
		}
	}
	* --- Code to combine clean data into single file
	* Append TAXQ`yr'`qtr'.dta files
	local TAXQfiles: dir "${mdir}/${rawdata1}" files "temp_ntaxi*"
	local i = 1
	foreach file of local TAXQfiles {
		if `i' == 1 { 
			use "${mdir}/${rawdata1}/`file'",  clear
		} 
		else if `i' > 1 { 
			append using "${mdir}/${rawdata1}/`file'"
		} 
		local ++i
	} 
		
	* Save tax file status data 
	save "${mdir}/${rawdata1}/ntaxi", replace	
	
	* Delete clean data
	foreach file of local TAXQfiles {
		erase "${mdir}/${rawdata1}/`file'"
	} 
}
else if _rc == 0 {
	di _n _n _n _n _n ///
	 "************************************" _n ///
	 "************************************" _n ///
	 "***          NTAXI Note:         ***" _n ///
	 "***    Combined NTAXI dataset    ***" _n ///
	 "***        already created       ***" _n ///
	 "************************************" _n ///
	 "************************************" _n 
}

************************************ 
***          Merge Data          *** 
************************************

* Check if DATA_FOR_IMPUTE.DTA exists - if not, then create
capture confirm file "${mdir}/${rawdata1}/data_for_impute.dta" 
if _rc != 0 {
	use "${mdir}/${rawdata1}/ntaxi", clear
	merge 1:1 newid tax_unit using "${mdir}/${rawdata1}/member"

	* Check if any NEWID-TAX_UNITs from TAXQ are unmatched 
	quietly count if _merge == 1
	if `r(N)' != 0 {
		preserve 
		keep if _merge == 1 
		drop _merge 
		merge 1:1 newid tax_unit using "${mdir}/${rawdata1}/memb_dpndtu"
		quietly count if _merge == 1 | _merge == 2 
		if `r(N)' != 0 {
			di _n _n _n _n _n ///
				"************************************" _n ///
				"************************************" _n ///
				"***    EIP IMPUTE DATA ERROR:    ***" _n ///
				"***       NEWIDs from TAXQ       ***" _n ///
				"***  not matched with dependent  ***" _n ///
				"*** from MEMBQ who had TU change ***" _n ///
				"************************************" _n ///
				"************************************" _n 
			e
		}
		if `r(N)' == 0 {
			di _n _n _n _n _n ///
				"************************************" _n ///
				"************************************" _n ///
				"***     EIP IMPUTE DATA NOTE:    ***" _n ///
				"***       NEWIDs from TAXQ       ***" _n ///
				"***   not matched because some   ***" _n ///
				"***   dependents are tax payers  ***" _n ///
				"*** These obs were reassigned to ***" _n ///
				"***    the TU that claims them   ***" _n ///
				"************************************" _n ///
				"************************************" _n  
			erase "${mdir}/${rawdata1}/memb_dpndtu.dta"
		}
	}
	restore
	drop if _merge == 1 

	* Check if any NEWID-TAX_UNITs from MEMBER are unmatched 
	quietly count if _merge == 2 
	if `r(N)' != 0 {
		di _n _n _n _n _n ///
			"************************************" _n ///
			"************************************" _n ///
			"***    EIP IMPUTE DATA ERROR:    ***" _n ///
			"***      NEWIDs from MEMBER      ***" _n ///
			"***          not matched         ***" _n ///
			"************************************" _n ///
			"************************************" _n 
		e
	}
	drop _merge

	* Save file status data 
	save "${mdir}/${rawdata1}/data_for_impute", replace	
	
	erase "${mdir}/${rawdata1}/ntaxi.dta"
	erase "${mdir}/${rawdata1}/member.dta"
}

************************************ 
***       Impute EIP Values      *** 
************************************

use "${mdir}/${rawdata1}/data_for_impute", clear 

* Need to create head of household file status 
destring filestat, replace 
replace filestat = 3 if (filestat == 1 & dpndany_tu != 0)

*** EIP 1 
cap drop eip1
gen eip1 = . 
* Single, no children 
replace eip1 = 1200 if fdagi_py < 75000 & filestat == 1
replace eip1 = 1200 - (fdagi_py-75000)*0.05 if fdagi_py >= 75000 & filestat == 1 
replace eip1 = 0 if eip1 < 0 & filestat == 1

* Head-of-household, with qualifying children 
replace eip1 = 1200 + 500*dpndlt17_tu if fdagi_py < 112500 & filestat == 3 & dpndlt17_tu != 0 
replace eip1 = 1200 + 500*dpndlt17_tu - (fdagi_py - 112500)*0.05 if fdagi_py >= 112500 & filestat == 3 & dpndlt17_tu != 0 
replace eip1 = 0 if eip1 < 0 & filestat == 3 & dpndlt17_tu != 0 

* Head-of-household, no qualifying children 
replace eip1 = 1200 if fdagi_py < 112500 & filestat == 3 & dpndlt17_tu == 0 & dpndany_tu != 0
replace eip1 = 1200 - (fdagi_py - 112500)*0.05 if fdagi_py >= 112500 & filestat == 3 & dpndlt17_tu == 0 & dpndany_tu != 0
replace eip1 = 0 if eip1 < 0 & filestat == 3 & dpndlt17_tu == 0 & dpndany_tu != 0

* Filing jointly, no qualifying children 
replace eip1 = 2400 if fdagi_py < 150000 & filestat == 2 & dpndlt17_tu == 0 
replace eip1 = 2400 - (fdagi_py - 150000)*0.05 if fdagi_py >= 150000 & filestat == 2 & dpndlt17_tu == 0 
replace eip1 = 0 if eip1 < 0 & filestat == 2 & dpndlt17_tu == 0 

* Filing jointly, with children 
replace eip1 = 2400 + 500*dpndlt17_tu if fdagi_py < 150000 & filestat == 2 & dpndlt17_tu != 0 
replace eip1 = 2400 + 500*dpndlt17_tu - (fdagi_py - 150000)*0.05 if fdagi_py >= 150000 & filestat == 2 & dpndlt17_tu != 0 
replace eip1 = 0 if eip1 < 0 & filestat == 2 & dpndlt17_tu != 0  

*** EIP 2 
cap drop eip2
gen eip2 = . 
* Single, no children 
replace eip2 = 600 if fdagi_py < 75000 & filestat == 1
replace eip2 = 600 - (fdagi_py-75000)*0.05 if fdagi_py >= 75000 & filestat == 1 
replace eip2 = 0 if eip2 < 0 & filestat == 1

* Head-of-household, with qualifying children 
replace eip2 = 600 + 600*dpndlt17_tu if fdagi_py < 112500 & filestat == 3 & dpndlt17_tu != 0 
replace eip2 = 600 + 600*dpndlt17_tu - (fdagi_py - 112500)*0.05 if fdagi_py >= 112500 & filestat == 3 & dpndlt17_tu != 0 
replace eip2 = 0 if eip2 < 0 & filestat == 3 & dpndlt17_tu != 0 

* Head-of-household, no qualifying children 
replace eip2 = 600 if fdagi_py < 112500 & filestat == 3 & dpndlt17_tu == 0 & dpndany_tu != 0
replace eip2 = 600 - (fdagi_py - 112500)*0.05 if fdagi_py >= 112500 & filestat == 3 & dpndlt17_tu == 0 & dpndany_tu != 0
replace eip2 = 0 if eip2 < 0 & filestat == 3 & dpndlt17_tu == 0 & dpndany_tu != 0

* Filing jointly, no qualifying children 
replace eip2 = 1200 if fdagi_py < 150000 & filestat == 2 & dpndlt17_tu == 0 
replace eip2 = 1200 - (fdagi_py - 150000)*0.05 if fdagi_py >= 150000 & filestat == 2 & dpndlt17_tu == 0 
replace eip2 = 0 if eip2 < 0 & filestat == 2 & dpndlt17_tu == 0 

* Filing jointly, with children 
replace eip2 = 1200 + 600*dpndlt17_tu if fdagi_py < 150000 & filestat == 2 & dpndlt17_tu != 0 
replace eip2 = 1200 + 600*dpndlt17_tu - (fdagi_py - 150000)*0.05 if fdagi_py >= 150000 & filestat == 2 & dpndlt17_tu != 0 
replace eip2 = 0 if eip2 < 0 & filestat == 2 & dpndlt17_tu != 0  

*** EIP 3
cap drop eip3
gen eip3 = . 
* Single, no children 
replace eip3 = 1400 if fdagi_py < 75000 & filestat == 1
replace eip3 = 1400 - (fdagi_py-75000)*(1400/(80000-75000)) if fdagi_py >= 75000 & filestat == 1 
replace eip3 = 0 if eip3 < 0 & filestat == 1

* Head-of-household, with dependent 
replace eip3 = 1400 + 1400*dpndany_tu if fdagi_py < 112500 & filestat == 3 & dpndany_tu != 0 
replace eip3 = 1400 + 1400*dpndany_tu - (fdagi_py - 112500)*((1400 + 1400*dpndany_tu)/(120000-112500)) if fdagi_py >= 112500 & filestat == 3 & dpndany_tu != 0 
replace eip3 = 0 if eip3 < 0 & filestat == 3 & dpndany_tu != 0 

* Filing jointly, no dependent 
replace eip3 = 2800 if fdagi_py < 150000 & filestat == 2 & dpndany_tu == 0 
replace eip3 = 2800 - (fdagi_py - 150000)*(2800/(160000-150000)) if fdagi_py >= 150000 & filestat == 2 & dpndany_tu == 0 
replace eip3 = 0 if eip3 < 0 & filestat == 2 & dpndany_tu == 0 

* Filing jointly, with dependent 
replace eip3 = 2800 + 1400*dpndany_tu if fdagi_py < 150000 & filestat == 2 & dpndany_tu != 0 
replace eip3 = 2800 + 1400*dpndany_tu - (fdagi_py - 150000)*((2800 + 1400*dpndany_tu)/(160000-15000)) if fdagi_py >= 150000 & filestat == 2 & dpndany_tu != 0 
replace eip3 = 0 if eip3 < 0 & filestat == 2 & dpndany_tu != 0  

keep newid qtr eip1 eip2 eip3
collapse (sum) eip1 eip2 eip3, by(newid qtr)

preserve 
tostring newid, replace 
gen id = substr(newid,1,6)
gen intrv = substr(newid,7,.)
drop newid qtr 
destring intrv, replace
reshape wide eip1 eip2 eip3, i(id) j(intrv)

tempfile hold
save `hold'
restore 
drop eip1 eip2 eip3
tostring newid, replace 
gen id = substr(newid,1,6) 
merge m:1 id using `hold'

* Check if any NEWIDs are unmatched 
quietly count if _merge == 1 | _merge == 2 
if `r(N)' != 0 {
	di _n _n _n _n _n ///
		"************************************" _n ///
		"************************************" _n ///
		"***     EIP_IMPUTATION ERROR:    ***" _n ///
	    "***    NEWIDs in USING or HOLD   ***" _n ///
		"***          not matched         ***" _n ///
		"************************************" _n ///
		"************************************" _n 
	e
}
drop _merge
drop id

forvalues i = 1/4 {
	replace eip1`i' = 0 if eip1`i' == .
	replace eip2`i' = 0 if eip2`i' == .
	replace eip3`i' = 0 if eip3`i' == .
}

* Save dataset as XLSX
drop qtr 
forvalues i = 1/4 {
	label var eip1`i' imp_eip1_`i'
	label var eip2`i' imp_eip2_`i'
	label var eip3`i' imp_eip3_`i'
}
export excel "${mdir}/${rawdata1}/imputed_eip.xlsx", firstrow(varlabels) sheet("output1") replace 
export excel "${mdir}/${rawdata2}/imputed_eip.xlsx", firstrow(varlabels) sheet("output1") replace 
export excel "${mdir}/${rawdata3}/imputed_eip.xlsx", firstrow(varlabels) sheet("output1") replace 
